"""
将python数据输入到pyspark中
"""
from pyspark import SparkConf, SparkContext

# 构建SparkContext对象
conf = SparkConf().setMaster("local[*]").setAppName("test_spark_app")
sc = SparkContext(conf=conf)

# 1.使用parallelize将python数据对象输入到spark中，转变为RDD对象
# rdd1 = sc.parallelize([1, 2, 3, 4, 5])
# rdd2 = sc.parallelize((1, 2, 3, 4, 5))
# rdd3 = sc.parallelize("abcdefg")
# rdd4 = sc.parallelize({1, 2, 3, 4, 5})
# rdd5 = sc.parallelize({"k1": "v1", "k2": "v2"})

# 通过collect方法获取RDD对象内容
# print(rdd1.collect())
# print(rdd2.collect())
# print(rdd3.collect())
# print(rdd4.collect())
# print(rdd5.collect())

# 2.通过sc.textFile方法读取文件数据并输入到spark中，转换为RDD对象
text_path = "/Users/dongjian/Documents/学习/python-learn-资料/资料/第15章资料/资料/hello.txt"
rdd_text = sc.textFile(text_path)
# 输出RDD对象内容 按行输出
print(rdd_text.collect())
# 关闭sc，通知spark程序
sc.stop()
